
# delimit;
clear;
capture log close;
set more off;


cd D:\Data\WorkData\703201\vibeke\GP;
log using "Happy Doctor_Restat_data.txt",text replace;




************************************************************************************;
*** CODE FOR HAPPY DOCTORE MAKES HAPPY BABY ****************************************;
*** ACCEPTED FOR PUBLICATION IN REVIEW OF STATISTICS AND ECONOMICS, APRIL 2013   ***;
************************************************************************************;

************************************************************************************;




* STEP 1: DEFINING THE POPULATION ************



*DEFINING TREATMENT AND CONTROL AREAS;

for Z in num 1980/1995:
use pnr foeddato koen kom using "D:\Data\WorkData\703201\raw\befZ.dta",clear
\gen yob=year(foeddato)
\gen mob=month(foeddato)
\gen male=koen=="1"
\keep if yob>=1981 & yob<=1992
\save popZ.dta,replace;

use pop1980.dta, clear;
for Z in num 1981/1995: 
\append using popZ.dta;
sort pnr;
keep if pnr!=pnr[_n-1];
rename pnr pnrb;
destring kom,replace;
gen k=1 if kom<=147;
replace k=2 if kom>147 & kom<=189;
for Z in any 155 185: replace k=1 if kom==Z;
replace k=6 if k==.;
label define muni 1 "CPH CITY" 2 "CPH COUNTY" 6 "NOGROUP", modify;
label values k muni;
save pop.dta,replace;
for Z in num 1980/1995: erase popZ.dta;
tab yob;


*I LINK TO THE FERTILITY REGISTER TO DEFINE TIME OF CONCEPTION;
use pop.dta,clear;
joinby pnr using "D:\Data\WorkData\703201\raw\fertilitet.dta";
sort pnrb;
keep if pnrb!=pnrb[_n-1];
* deletes infants dying during birth
drop if mdoed=="1";
tab yob;
tab yob if k<=2 & yob>1981 & yob<1991;
keep alderm pnrb pnrm pnrf vaegt foeddato yob mob male svlengde k;
 tab yob, su(svl);
 * recodes gestation from days to weeks - everyone above 44 weeks or missing gestation are recoded to 44 weeks;
  replace svlengde=44 if svlengde>44;
  gen xx= svlengde==44 ;
 tab xx; /*6%*/
 gen svl=svlengde;
 gen svlx= svl*7;
 * defines time of conception as date of birth minus gestation length, improtant for finding mothers' residence during pregnancy;
* I use the above average gestation length to define time of conception, to be sure that the mother has been living in the area during pregnancy;
 gen concep=foeddato-svlx;
 gen yoc=year(concep);
 gen moc=month(concep);
 drop svl svlx;
 keep if yoc>1980 & yob<=1990;
 drop if yoc==1981 & moc<10;
 drop if yob==1990 & mob>=10;
 tab yob;
 * Zone defines the 9 mth period right after the reform,that I later wants to drop (mixed treatment period);
 gen zone=(yob==1987 & mob>=10);
 replace zone=1 if yob==1988 & mob<=5;
* defining the pre-reform period: until october 1, 1987;

 gen pre=(foeddato<10135);
 replace pre=. if yob>1990 | (yob==1990 & mob>=10);
 gen pre1=(foeddato<10135);
 replace pre1=. if yob>1990 | (yob==1990 & mob>=10);
 replace pre1=. if yoc<1984 | (yoc==1984 & moc<10);
 save fertil.dta,replace;

 
 * DATA STEP 2: DEFINING THE BIOLOGICAL MOTHERS & FATHERS;
 * I use the fertility register for biological instead of the legal parents;
for Z in num 1980/1993:
\use pnrm using fertil.dta,clear
\sort pnrm
\keep if pnrm!=pnrm[_n-1]
\rename pnrm pnr
\joinby pnr using "D:\Data\WorkData\703201\raw\befZ.dta",unmatched(master) 
\tab _m
\drop _m
\gen year=Z
\gen yobm=year(foeddato)
\rename pnr pnrm
\keep pnrm yobm year
\save pnrZ.dta,replace;

use pnr1980.dta, clear;
for Z in num 1981/1993: 
\append using pnrZ.dta;
save popm.dta,replace;
for Z in num 1980/1993: erase pnrZ.dta;
* defing the mothers and  year of birth (yobm);
use pnrm using fertil.dta,clear;
joinby pnrm using popm.dta,unmatched(master);
tab _m;
keep if _m==3;
drop if yobm==.;
sort pnrm year;
keep if pnrm!=pnrm[_n+1];
keep pnrm yobm;
save poppnrm.dta,replace;
erase popm.dta;
* defining the fathers;
for Z in num 1980/1993:
\use pnrf using fertil.dta,clear
\sort pnrf
\keep if pnrf!=pnrf[_n-1]
\rename pnrf pnr
\joinby pnr using "D:\Data\WorkData\703201\raw\befZ.dta",unmatched(master) 
\tab _m
\drop _m
\gen year=Z
\gen yobf=year(foeddato)
\rename pnr pnrf
\keep pnrf yobf year
\save pnrZ.dta,replace;

use pnr1980.dta, clear;
for Z in num 1981/1993: 
\append using pnrZ.dta;
save popf.dta,replace;
for Z in num 1980/1993: erase pnrZ.dta;



use pnrf using fertil.dta,clear;
joinby pnrf using popf.dta,unmatched(master);
tab _m;
sort pnrf year;
keep if pnrf!=pnrf[_n+1];
keep pnrf yobf;
save poppnrf.dta,replace;
erase popf.dta;

use pop.dta,clear;
tab yob if k<=2 & yob> 1981 & yob<=1990;
joinby pnrb using fertil.dta;
tab yob if k<=2 & yob> 1981 & yob<=1990;
joinby pnrm using poppnrm.dta;
tab yob if k<=2 & yob> 1981 & yob<=1990;
joinby pnrf using poppnrf.dta,unmatched(master);
tab yob if k<=2 & yob> 1981 & yob<=1990;
tab yob;
sort pnrb;
keep if pnrb!=pnrb[_n-1];
drop _me;
save poppnrmpnrf.dta,replace;
* parents and child identifier file;

for Z in num 1980/1993:
\use pnrm using poppnrmpnrf.dta,clear
\sort pnrm
\keep if pnrm!=pnrm[_n-1]
\rename pnrm pnr
\joinby pnr using "D:\Data\WorkData\703201\raw\cpstZ.dta",unmatched(master)
\tab _merge
\drop _merge
\gen year=Z
\ keep pnr kom year
\rename pnr pnrm
\keep pnrm year kom
\save pnrZ.dta,replace;

use pnr1980.dta, clear;
for Z in num 1981/1993: 
\append using pnrZ.dta;
destring kom,replace;
gen k=1 if kom<=147;
replace k=2 if kom>147 & kom<=189;
for Z in any 155 185: replace k=1 if kom==Z;
replace k=6 if k==.;
label define muni 1 "CPH" 2 "SUBCPH" 6 "NOGROUP", modify;
label values k muni;
save popm.dta,replace;
for Z in num 1980/1993: erase pnrZ.dta;


* merge municipality of recidence to the year of conception;
* merge municipality of residence to the year of birth;

* i allow mothers to move around within the case/control groups;
use popm.dta,clear;
gen yoc= year-1;
rename k kyoc;
rename kom komyoc;
joinby pnrm yoc using poppnrmpnrf.dta,unmatched(using);
tab _m;
drop _m;
save tmp.dta,replace;


use popm.dta,clear;
gen yob=  year-1;
rename k kyob;
rename kom komyob;
joinby pnrm yob using tmp.dta,unmatched(using);
tab _m;
drop _m;
gen x=kyob!=kyoc;
tab x;
drop  if kyob!=kyoc;
drop x  kyoc komyoc;
rename kyob kpnrm;
rename komyob kompnrm;
* keeps mothers that are in a case or control group in yoc and yob;
* around 3.9% in total pop is dropped;
save tmp.dta,replace;

* DATA STEP 3: DEFING CHILD OUTCOME ;
use pnrb vaegt svlengde mdoed using "D:\Data\WorkData\703201\raw\fertilitet.dta",clear;
save fertil.dta,replace;
use tmp.dta,clear;
keep pnrb pnrm pnrf kompnrm kpnrm  yob mob yoc male pnrf yobf yobm pre pre1 zone concep;
sort pnrb pnrm;
keep if pnrb!=pnrb[_n-1];
joinby pnrb using fertil.dta,unmatched(master);
drop if mdoed=="1"; /* dead infants are deleted*/
tab _m;

sort pnrb;
gen x=pnrb!="";
egen xx= sum(x), by(pnrb);
tab xx;
drop if xx==2;
drop x xx;
*2 DOUPLETES - deleted;
gen bw=vaegt;
replace bw=. if bw<500 | bw>6000; /* drops everyone with birthweight less than 500 grams and more than 6000 grams*/
gen svl=svlengde;
replace svl=. if svl>44; /* drops everyone with gestation above 44 weeks*/
replace svl=. if svl<20; /* drops everyone with gestation less than 20 weeks*/
gen x= bw==. | svl==.; 
tab x; /* defines the number of infants deleted*/
drop if x==1;
gen fg=bw/svl; /* fetal growth*/
gen lgbw= log(bw); /* log birth weight*/
gen lgsvl=log(svl); /* log gestational length*/
gen ptb=(svl<37); /* defines preterm birth*/
gen vptb=(svl<32); /* defines very preterm birth*/
gen lbw=(bw<2500); /* birth weight dummies*/
gen bwd1=(bw<2000);
gen bwd2= (bw<2250);
gen bwd3= (bw<2500);
gen bwd4= (bw<2750);
gen bwd5= (bw<3000);
gen bwd6= (bw<3250);
gen bwd7= (bw<3500);
gen bwd8= (bw<3750);
gen agem=yob-yobm; /* maternal age at birth*/
tab agem;
gen ym=agem<26; /* maternal age at birth in groups*/
gen ym1= agem<20;
gen ym2= agem<22;
gen ym3=agem<24;
gen ym5=agem<28;
gen ym6=agem<30;
gen ym7=agem<32;
gen ym8=agem<34;
gen ym9=agem<36;
gen am=1 if agem<24;
replace am=2 if agem>=24 & agem<29;
replace am=3 if agem>=29 & agem<32;
replace am=4 if agem>=32;
label define am 1 "<24" 2 "24-28" 3 "29-31" 4 ">31";
label value am am;
gen case=kpnrm==1;
drop vaegt svlengde x;
save infant.dta,replace;

* DATASTEP 4: DEFINING COVARIATES;
use pnrm using infant.dta,clear;
sort pnrm;
keep if pnrm!=pnrm[_n-1];
joinby pnrm using "D:\Data\WorkData\703201\raw\fertilitet.dta",unmatched(master);
tab _m;
drop _m;
keep mdoed pnrm pnrb  fodtdato;
drop if mdoed=="1";
gen x=pnrb!="";
egen nsib=sum(x), by (pnrm); /* define number of sibs*/
drop x;
replace nsib=4 if nsib>4; /* number of sibs more than 4 --> one category*/
sort pnrm fodtdato;
by pnrm: gen oob=_n;
replace oob=8 if oob>8; /* oder of birth, more than 8,--> one category*/
keep nsib oob pnrb; 
save tmp.dta,replace;


use infant.dta,clear;
drop _m;
joinby pnrb using tmp.dta,unmatched(master);
tab _m; 
label define nsib 1 "only child" 2 "two sibs" 3 "3 sibs" 4 "4+ sibs";
label variable nsib nsib;
save infant.dta,replace;


*defining ethnicity for the mother;
for Z in num 1980/1995:
\use pnrm using infant.dta,clear
\rename pnrm pnr
\joinby pnr using "D:\Data\WorkData\703201\raw\iepeZ.dta", unmatched(master)
\drop _merge
\keep pnr  ielandg2
\gen year=Z
\gen dane=(ielandg2=="1")
\gen west=(ielandg2=="2")
\gen nowest=(ielandg2=="3")
\save imigrZ.dta,replace;

use imigr1980.dta,clear;
for Z in num 1981/1995: append using imigrZ.dta;
sort pnr year ielandg2;
keep if pnr!=pnr[_n+1];
keep pnr dane west nowest;
rename pnr pnrm;
save imigr.dta,replace;
for Z in num 1980/1995:erase imigrZ.dta;

use infant.dta,clear;
drop _m;
joinby pnrm  using imigr.dta,unmatched(master);
tab _m;
drop _m;
save infant.dta,replace;



* parental education ;
for Z in num 1981/1993: 
\use pnrm  using infant.dta,clear
\sort pnrm
\keep if pnrm!=pnrm[_n-1]
\rename pnrm pnr
\joinby pnr using "D:\Data\WorkData\703201\raw\uddaZ.dta"
\gen yob=Z
\keep pnr hfpria yob
\rename pnr pnrm
\rename hfpria udlm
\destring udlm,replace
\recode udlm min/84=84 216/max=216 /* recode education to a min of 84 months and a max of 216 months */
\save udZ.dta,replace;


use ud1981.dta,clear;
for Z in num 1982/1993: append using udZ.dta;
save udm.dta,replace;

for Z in num 1981/1993: 
\use pnrf  using infant.dta,clear
\sort pnrf
\keep if pnrf!=pnrf[_n-1]
\rename pnrf pnr
\joinby pnr using "D:\Data\WorkData\703201\raw\uddaZ.dta"
\gen yob=Z
\keep pnr hfpria yob
\rename pnr pnrf
\rename hfpria udlf
\destring udlf,replace
\recode udlf min/84=84 216/max=216
\save udZ.dta,replace;


use ud1981.dta,clear;
for Z in num 1982/1993: append using udZ.dta;
save udf.dta,replace;

use infant.dta,clear;
joinby pnrm yob using udm.dta, unmatched(master);
tab _m;
drop _m;
joinby pnrf yob using udf.dta, unmatched(master);
tab _m;
drop _m;
save infant1.dta,replace;


* parental income and labor market attachment/occupation;
for Z in num 1980/1992: 
\use pnrm  using infant.dta,clear
\sort pnrm
\keep if pnrm!=pnrm[_n-1]
\rename pnrm pnr
\joinby pnr using "D:\Data\WorkData\703201\raw\indhZ.dta"
\gen yoc=Z
\keep pnr brutto yoc
\rename pnr pnrm
\destring brutto,replace
\replace brutto =1 if brutto<0 & brutto!=.
\rename brutto incpnrm
\save indZ.dta,replace;


use ind1980.dta,clear;
for Z in num 1981/1992: append using indZ.dta;
gen lincm=log(incpnrm);
xtile qincm = incpnrm, nq(4);
keep pnrm qincm lincm yoc;
save indkm.dta,replace;


for Z in num 1980/1992: 
\use pnrf  using infant.dta,clear
\sort pnrf
\keep if pnrf!=pnrf[_n-1]
\rename pnrf pnr
\joinby pnr using "D:\Data\WorkData\703201\raw\indhZ.dta"
\gen yoc=Z
\keep pnr brutto yoc
\rename pnr pnrf
\destring brutto,replace
\replace brutto =1 if brutto<0 & brutto!=.
\rename brutto incpnrf
\save indZ.dta,replace;




use ind1980.dta,clear;
for Z in num 1981/1992: append using indZ.dta;
gen lincf=log(incpnrf);
xtile qincf = incpnrf, nq(4);
keep pnrf qincf lincf yoc;
save indkf.dta,replace;

use infant1.dta,clear;
joinby pnrm yoc using indkm.dta, unmatched(master);
tab _m;
drop _m;
joinby pnrf yoc using indkf.dta, unmatched(master);
tab _m;
drop _m;
save infant1.dta,replace;


for Z in num 1980/1992: 
\use pnrm  using infant.dta,clear
\sort pnrm
\keep if pnrm!=pnrm[_n-1]
\rename pnrm pnr
\joinby pnr using  "D:\Data\WorkData\703201\raw\idapZ.dta"
\gen yoc=Z
\keep pnr pstill yoc
\rename pnr pnrm
\destring pstill,replace
\gen o4m= pstill>=40
\drop pstill
\save ocuZ.dta,replace;


use ocu1980.dta,clear;
for Z in num 1981/1992: append using ocuZ.dta;
save ocum.dta,replace;

for Z in num 1980/1992: 
\use pnrf  using infant.dta,clear
\sort pnrf
\keep if pnrf!=pnrf[_n-1]
\rename pnrf pnr
\joinby pnr using  "D:\Data\WorkData\703201\raw\idapZ.dta"
\gen yoc=Z
\keep pnr pstill yoc
\rename pnr pnrf
\destring pstill,replace
\gen o4f= pstill>=40
\drop pstill
\save ocuZ.dta,replace;


use ocu1980.dta,clear;
for Z in num 1981/1992: append using ocuZ.dta;
save ocuf.dta,replace;

use infant1.dta,clear;
joinby pnrm yoc using ocum.dta, unmatched(master);
tab _m;
drop _m;
joinby pnrf yoc using ocuf.dta, unmatched(master);
tab _m;
drop _m;
save infant1.dta,replace;

* Marital status ;
for Z in num 1980/1992: 
\use pnrm  using infant.dta,clear
\sort pnrm
\keep if pnrm!=pnrm[_n-1]
\rename pnrm pnr
\joinby pnr using  "D:\Data\WorkData\703201\raw\befZ.dta"
\gen yoc=Z
\keep pnr familie_type yoc
\rename pnr pnrm
\destring familie_type,replace
\gen singl= familie_type==5
\drop familie_type
\save marZ.dta,replace;


use mar1980.dta,clear;
for Z in num 1981/1992: append using marZ.dta;
save marm.dta,replace;

use infant1.dta,clear;
joinby pnrm yoc using marm.dta,unmatched(master);
tab _m;
drop _m;
save infant1.dta,replace;

for Z in num 1980/1992: 
\erase indZ.dta
\erase ocuZ.dta
\erase marZ.dta;
erase udm.dta;
erase udf.dta;
erase marm.dta;
erase indkf.dta;
erase indkm.dta;
erase imigr.dta;
erase ocuf.dta;
erase ocum.dta;
erase fertil.dta;
erase poppnrmpnrf.dta;
erase pop.dta;
erase tmp.dta;

for Z in num 1981/1992:erase udZ.dta;





use infant1.dta,clear;
drop if pre==.;
drop if kpnrm==6;
for Z in any  yobm singl o4f o4m qincf lincf qincm lincm udlf udlm nowest west dane oob nsib case am ym9 ym8 ym7 ym6 ym5 ym3 ym2 ym1 ym agem bwd8 bwd7 bwd6 bwd5 bwd4 bwd3 bwd2 bwd1 lbw vptb ptb lgsvl lgbw fg svl bw  yobf yobm pre pre1 zone concep male  yoc yob kpnrm: 
\gen x=Z==.
\tab x
\drop x;
gen x=pnrm=="";
tab x;
gen y =pnrf=="";
tab y;
rename y mpnrf;
drop x;
for Z in any m f: tab qincZ,gen(qZ);
for Z in any o4f o4m  singl udlf udlm qincm qincf: gen mZ=(Z==.);
for Z in any lincf lincm o4f o4m  singl: replace Z=0 if Z==.;
*Recode variabels to dummies. Replaces the few missing values with zero, to avoid loosing persons. ;
*for Z in any f m: replace qZ1=1 if qincZ==.;
*replaces missings with the lowest income group;
for Z in any udlf udlm: replace Z=84 if Z==.;
* replace missing education with the lowest level;
replace case=1 if kpnrm==3;
gen preca=(pre*case);

joinby kompnrm yob using "D:\Data\WorkData\703201\Vibeke\GP\gpx.dta",unmatched(master);
* add info on the GPs - gender, age and age^2;
tab _merge;
for Z in any   Docsex Docage Docagesq: replace Z =0 if Z==.;
drop _merge;


tab yob, gen(yb);
tab mob, gen(mb);
drop qf* qm*;

tab qincf,gen(qf);
tab qincm,gen(qm);
for Z in num 1/4: 
\replace qmZ=0 if qmZ==.
\replace qfZ=0 if qfZ==.;
* place missing info into the reference group for income;
gen mqm=(qincm==.);
gen mqf=(qincf==.);
*define a variable for missing income;
tab am,gen(amx);

tab nsib, gen(sb);
*drop yb5 mb9 amx2 qf1 qm1 sb2;
save infant2.dta,replace;

clear all;
exit stata;










